# import Python libraries
import pandas as pd
from pandas import read_csv
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import YouTubeVideo
| Event data (Atletico Madrid) | |
|---|---|
![]() |
|
| Image source: Friends of Tracking GitHub | Image source: Friends of Tracking YouTube |
Data source: https://github.com/metrica-sports/sample-data
df_soccer = pd.read_csv("https://raw.githubusercontent.com/metrica-sports/sample-data/master/data/Sample_Game_1/Sample_Game_1_RawEventsData.csv");
df_soccer
| Team | Type | Subtype | Period | Start Frame | Start Time [s] | End Frame | End Time [s] | From | To | Start X | Start Y | End X | End Y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Away | SET PIECE | KICK OFF | 1 | 1 | 0.04 | 0 | 0.00 | Player19 | NaN | NaN | NaN | NaN | NaN |
| 1 | Away | PASS | NaN | 1 | 1 | 0.04 | 3 | 0.12 | Player19 | Player21 | 0.45 | 0.39 | 0.55 | 0.43 |
| 2 | Away | PASS | NaN | 1 | 3 | 0.12 | 17 | 0.68 | Player21 | Player15 | 0.55 | 0.43 | 0.58 | 0.21 |
| 3 | Away | PASS | NaN | 1 | 45 | 1.80 | 61 | 2.44 | Player15 | Player19 | 0.55 | 0.19 | 0.45 | 0.31 |
| 4 | Away | PASS | NaN | 1 | 77 | 3.08 | 96 | 3.84 | Player19 | Player21 | 0.45 | 0.32 | 0.49 | 0.47 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1740 | Home | PASS | NaN | 2 | 143361 | 5734.44 | 143483 | 5739.32 | Player12 | Player13 | 0.60 | 0.33 | 0.19 | 0.95 |
| 1741 | Home | PASS | NaN | 2 | 143578 | 5743.12 | 143593 | 5743.72 | Player13 | Player4 | 0.09 | 0.88 | 0.14 | 0.69 |
| 1742 | Home | BALL LOST | INTERCEPTION | 2 | 143598 | 5743.92 | 143618 | 5744.72 | Player4 | NaN | 0.13 | 0.69 | 0.07 | 0.61 |
| 1743 | Away | RECOVERY | BLOCKED | 2 | 143617 | 5744.68 | 143617 | 5744.68 | Player16 | NaN | 0.05 | 0.62 | NaN | NaN |
| 1744 | Away | BALL OUT | NaN | 2 | 143622 | 5744.88 | 143630 | 5745.20 | Player16 | NaN | 0.05 | 0.63 | 0.03 | 1.01 |
1745 rows × 14 columns
px.scatter(df_soccer, x='Start X', y='Start Y', color='Type', hover_data=['Subtype'], title='All events during the game by on-field location')
Count = df_soccer.groupby("Type").size().reset_index(name="Count")
Count.sort_values('Count', ascending=False)
| Type | Count | |
|---|---|---|
| 5 | PASS | 799 |
| 6 | RECOVERY | 278 |
| 0 | BALL LOST | 257 |
| 3 | CHALLENGE | 233 |
| 7 | SET PIECE | 77 |
| 1 | BALL OUT | 51 |
| 8 | SHOT | 24 |
| 4 | FAULT RECEIVED | 22 |
| 2 | CARD | 4 |
events_df = px.bar(Count, x='Type', y='Count', color="Type", title='Total counts for types of events')
events_df.update_layout(xaxis={'categoryorder':'total descending'})
px.scatter(df_soccer, x='Start Time [s]', y='Type', hover_data=['Subtype'], color='Team', title="Timeline of events by type for Away and Home")
px.scatter(df_soccer.loc[lambda df: (df['Team'] == 'Home') & (df['From'] == 'Player10'), :], x='Start Time [s]', y='Type', color='Type', title='Timeline of events by type for player 10 on Home team')
px.scatter(df_soccer.loc[lambda df: (df['Team'] == 'Home') & (df['Period'] == 2) & (df['Type'] == 'PASS'), :], x='Start X', y='Start Y', size='Period', size_max=6, title='Home team passes, second half')
%%html
<iframe width="1024" height="600" src="https://plotly.com/python/" frameborder="0"</iframe>
%%html
<h3>🔎 Which techniques lead to goal scoring?</h3><br><iframe width="1024" height="400" src="https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=goaltypes/index.html" frameborder="0" ></iframe>
labels = ['left foot', 'right foot', 'header', 'other']
values = [4, 11, 5, 0]
px.bar(x=labels, y=values, title='Bayern: Type of goal')
%%html
<h3>🔎 Comparing teams: Do similar techniques lead to goal scoring?</h3><br><iframe width="1024" height="400" src="https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=goaltypes/index.html" frameborder="0" ></iframe>
goal_type=['left foot', 'right foot', 'header', 'other']
bar_goal_type = go.Figure(data=[
go.Bar(name='Bayern', x=goal_type, y=[4, 11, 5, 0]),
go.Bar(name='Barcelona', x=goal_type, y=[6, 10, 1, 1])])
bar_goal_type.update_layout(title_text='Bayern versus Barcelona: Type of goal')
%%html
<h3>🔎 How do on target goals and unsuccessful goal attempts compare?</h3><br><iframe width="1024" height="400" src="https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=attempts/index.html" frameborder="0" ></iframe>
px.pie(names=['On target', 'Off target', 'Blocked', 'Against woodwork'],
values=[48, 38, 22, 4],
title='Barcelona: Goal attempts')
Data inputted in csv file on March 10, 2021, prior to knock-outs.
%%writefile possession.csv
Passes completed (%),Average ball possession (%),Goal difference,Average scored per game,Team
83,50,-8,0.5,Rennes
82,50,-8,0.33,Olympiacos
82,47,-9,0.67,Zenit
86,45,-9,0.67,Dynamo_Kyiv
77,48,-7,1.67,Salzburg
80,45,-11,1.17,Istanbul_Basaksehir
76,40,7,1.75,Porto
73,46,-9,0.67,Midtjylland
75,40,-5,1,Krasnodar
77,42,-5,0.83,Lokomotiv_Moskva
91,56,8,2.25,Barcelona
89,60,3,1.71,Real_Madrid
88,59,14,2.14,Man_City
86,51,8,2.13,Dortmund
86,61,16,3.14,Bayern
88,54,10,2.25,Juventus
85,54,13,2.14,Chelsea
86,61,0,1.63,Sevilla
82,53,9,1.75,Liverpool
87,53,10,2.25,Paris
80,51,-3,1.38,Leipzig
81,48,-2,1,Atletico
84,43,5,2.29,Monchengladbach
84,46,1,1.71,Lazio
80,45,1,1.43,Atalanta
81,55,0,1.17,Ajax
85,51,-2,1.17,Internazionale
82,51,-2,1.33,Club_Brugge
84,50,5,2.5,Man_United
86,47,-7,0.83,Shakhtar_Donetsk
85,47,-12,0.83,Ferencvaros
84,50,-11,0.33,Marseille
Overwriting possession.csv
possession_df = pd.read_csv('possession.csv')
possession_df.sort_values('Goal difference', ascending=False)
| Passes completed (%) | Average ball possession (%) | Goal difference | Average scored per game | Team | |
|---|---|---|---|---|---|
| 14 | 86 | 61 | 16 | 3.14 | Bayern |
| 12 | 88 | 59 | 14 | 2.14 | Man_City |
| 16 | 85 | 54 | 13 | 2.14 | Chelsea |
| 19 | 87 | 53 | 10 | 2.25 | Paris |
| 15 | 88 | 54 | 10 | 2.25 | Juventus |
| 18 | 82 | 53 | 9 | 1.75 | Liverpool |
| 13 | 86 | 51 | 8 | 2.13 | Dortmund |
| 10 | 91 | 56 | 8 | 2.25 | Barcelona |
| 6 | 76 | 40 | 7 | 1.75 | Porto |
| 22 | 84 | 43 | 5 | 2.29 | Monchengladbach |
| 28 | 84 | 50 | 5 | 2.50 | Man_United |
| 11 | 89 | 60 | 3 | 1.71 | Real_Madrid |
| 24 | 80 | 45 | 1 | 1.43 | Atalanta |
| 23 | 84 | 46 | 1 | 1.71 | Lazio |
| 25 | 81 | 55 | 0 | 1.17 | Ajax |
| 17 | 86 | 61 | 0 | 1.63 | Sevilla |
| 26 | 85 | 51 | -2 | 1.17 | Internazionale |
| 21 | 81 | 48 | -2 | 1.00 | Atletico |
| 27 | 82 | 51 | -2 | 1.33 | Club_Brugge |
| 20 | 80 | 51 | -3 | 1.38 | Leipzig |
| 9 | 77 | 42 | -5 | 0.83 | Lokomotiv_Moskva |
| 8 | 75 | 40 | -5 | 1.00 | Krasnodar |
| 4 | 77 | 48 | -7 | 1.67 | Salzburg |
| 29 | 86 | 47 | -7 | 0.83 | Shakhtar_Donetsk |
| 0 | 83 | 50 | -8 | 0.50 | Rennes |
| 1 | 82 | 50 | -8 | 0.33 | Olympiacos |
| 7 | 73 | 46 | -9 | 0.67 | Midtjylland |
| 3 | 86 | 45 | -9 | 0.67 | Dynamo_Kyiv |
| 2 | 82 | 47 | -9 | 0.67 | Zenit |
| 5 | 80 | 45 | -11 | 1.17 | Istanbul_Basaksehir |
| 31 | 84 | 50 | -11 | 0.33 | Marseille |
| 30 | 85 | 47 | -12 | 0.83 | Ferencvaros |
scatter_df = px.scatter(possession_df,
x="Average ball possession (%)", y="Goal difference", trendline="ols",
title="Relationship between average ball possesion (%) and goal difference")
scatter_df.show()
scatter_df = px.scatter(possession_df,
x="Average ball possession (%)", y="Goal difference",
size="Average scored per game", color="Team",
title="Relationship between average ball possesion (%) and goal difference by team")
scatter_df.show()